Guided Project: Designing and Creating a Database

Posted on Wed 08 July 2015 in Projects

In [1]:
import sqlite3
import pandas as pd
import csv

pd.set_option('max_columns', 180)
pd.set_option('max_rows', 200000)
pd.set_option('max_colwidth', 5000)

Getting to Know the Data

In [2]:
log = pd.read_csv("game_log.csv",low_memory=False)
print(log.shape)
log.head()
(171907, 161)
Out[2]:
date number_of_game day_of_week v_name v_league v_game_number h_name h_league h_game_number v_score h_score length_outs day_night completion forefeit protest park_id attendance length_minutes v_line_score h_line_score v_at_bats v_hits v_doubles v_triples v_homeruns v_rbi v_sacrifice_hits v_sacrifice_flies v_hit_by_pitch v_walks v_intentional_walks v_strikeouts v_stolen_bases v_caught_stealing v_grounded_into_double v_first_catcher_interference v_left_on_base v_pitchers_used v_individual_earned_runs v_team_earned_runs v_wild_pitches v_balks v_putouts v_assists v_errors v_passed_balls v_double_plays v_triple_plays h_at_bats h_hits h_doubles h_triples h_homeruns h_rbi h_sacrifice_hits h_sacrifice_flies h_hit_by_pitch h_walks h_intentional_walks h_strikeouts h_stolen_bases h_caught_stealing h_grounded_into_double h_first_catcher_interference h_left_on_base h_pitchers_used h_individual_earned_runs h_team_earned_runs h_wild_pitches h_balks h_putouts h_assists h_errors h_passed_balls h_double_plays h_triple_plays hp_umpire_id hp_umpire_name 1b_umpire_id 1b_umpire_name 2b_umpire_id 2b_umpire_name 3b_umpire_id 3b_umpire_name lf_umpire_id lf_umpire_name rf_umpire_id rf_umpire_name v_manager_id v_manager_name h_manager_id h_manager_name winning_pitcher_id winning_pitcher_name losing_pitcher_id losing_pitcher_name saving_pitcher_id saving_pitcher_name winning_rbi_batter_id winning_rbi_batter_id_name v_starting_pitcher_id v_starting_pitcher_name h_starting_pitcher_id h_starting_pitcher_name v_player_1_id v_player_1_name v_player_1_def_pos v_player_2_id v_player_2_name v_player_2_def_pos v_player_3_id v_player_3_name v_player_3_def_pos v_player_4_id v_player_4_name v_player_4_def_pos v_player_5_id v_player_5_name v_player_5_def_pos v_player_6_id v_player_6_name v_player_6_def_pos v_player_7_id v_player_7_name v_player_7_def_pos v_player_8_id v_player_8_name v_player_8_def_pos v_player_9_id v_player_9_name v_player_9_def_pos h_player_1_id h_player_1_name h_player_1_def_pos h_player_2_id h_player_2_name h_player_2_def_pos h_player_3_id h_player_3_name h_player_3_def_pos h_player_4_id h_player_4_name h_player_4_def_pos h_player_5_id h_player_5_name h_player_5_def_pos h_player_6_id h_player_6_name h_player_6_def_pos h_player_7_id h_player_7_name h_player_7_def_pos h_player_8_id h_player_8_name h_player_8_def_pos h_player_9_id h_player_9_name h_player_9_def_pos additional_info acquisition_info
0 18710504 0 Thu CL1 NaN 1 FW1 NaN 1 0 2 54.0 D NaN NaN NaN FOR01 200.0 120.0 000000000 010010000 30.0 4.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 NaN 6.0 1.0 NaN -1.0 NaN 4.0 1.0 1.0 1.0 0.0 0.0 27.0 9.0 0.0 3.0 0.0 0.0 31.0 4.0 1.0 0.0 0.0 2.0 0.0 0.0 0.0 1.0 NaN 0.0 0.0 NaN -1.0 NaN 3.0 1.0 0.0 0.0 0.0 0.0 27.0 3.0 3.0 1.0 1.0 0.0 boakj901 John Boake NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN paboc101 Charlie Pabor lennb101 Bill Lennon mathb101 Bobby Mathews prata101 Al Pratt NaN NaN NaN NaN prata101 Al Pratt mathb101 Bobby Mathews whitd102 Deacon White 2.0 kimbg101 Gene Kimball 4.0 paboc101 Charlie Pabor 7.0 allia101 Art Allison 8.0 white104 Elmer White 9.0 prata101 Al Pratt 1.0 sutte101 Ezra Sutton 5.0 carlj102 Jim Carleton 3.0 bassj101 John Bass 6.0 selmf101 Frank Sellman 5.0 mathb101 Bobby Mathews 1.0 foraj101 Jim Foran 3.0 goldw101 Wally Goldsmith 6.0 lennb101 Bill Lennon 2.0 caret101 Tom Carey 4.0 mince101 Ed Mincher 7.0 mcdej101 James McDermott 8.0 kellb105 Bill Kelly 9.0 NaN Y
1 18710505 0 Fri BS1 NaN 1 WS3 NaN 1 20 18 54.0 D NaN NaN NaN WAS01 5000.0 145.0 107000435 640113030 41.0 13.0 1.0 2.0 0.0 13.0 0.0 0.0 0.0 18.0 NaN 5.0 3.0 NaN -1.0 NaN 12.0 1.0 6.0 6.0 1.0 0.0 27.0 13.0 10.0 1.0 2.0 0.0 49.0 14.0 2.0 0.0 0.0 11.0 0.0 0.0 0.0 10.0 NaN 2.0 1.0 NaN -1.0 NaN 14.0 1.0 7.0 7.0 0.0 0.0 27.0 20.0 10.0 2.0 3.0 0.0 dobsh901 Henry Dobson NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN wrigh101 Harry Wright younn801 Nick Young spala101 Al Spalding braia102 Asa Brainard NaN NaN NaN NaN spala101 Al Spalding braia102 Asa Brainard wrigg101 George Wright 6.0 barnr102 Ross Barnes 4.0 birdd102 Dave Birdsall 9.0 mcvec101 Cal McVey 2.0 wrigh101 Harry Wright 8.0 goulc101 Charlie Gould 3.0 schah101 Harry Schafer 5.0 conef101 Fred Cone 7.0 spala101 Al Spalding 1.0 watef102 Fred Waterman 5.0 forcd101 Davy Force 6.0 mille105 Everett Mills 3.0 allid101 Doug Allison 2.0 hallg101 George Hall 7.0 leona101 Andy Leonard 4.0 braia102 Asa Brainard 1.0 burrh101 Henry Burroughs 9.0 berth101 Henry Berthrong 8.0 HTBF Y
2 18710506 0 Sat CL1 NaN 2 RC1 NaN 1 12 4 54.0 D NaN NaN NaN RCK01 1000.0 140.0 610020003 010020100 49.0 11.0 1.0 1.0 0.0 8.0 0.0 0.0 0.0 0.0 NaN 1.0 0.0 NaN -1.0 NaN 10.0 1.0 0.0 0.0 2.0 0.0 27.0 12.0 8.0 5.0 0.0 0.0 36.0 7.0 2.0 1.0 0.0 2.0 0.0 0.0 0.0 0.0 NaN 3.0 5.0 NaN -1.0 NaN 5.0 1.0 3.0 3.0 1.0 0.0 27.0 12.0 13.0 3.0 0.0 0.0 mawnj901 J.H. Manny NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN paboc101 Charlie Pabor hasts101 Scott Hastings prata101 Al Pratt fishc102 Cherokee Fisher NaN NaN NaN NaN prata101 Al Pratt fishc102 Cherokee Fisher whitd102 Deacon White 2.0 kimbg101 Gene Kimball 4.0 paboc101 Charlie Pabor 7.0 allia101 Art Allison 8.0 white104 Elmer White 9.0 prata101 Al Pratt 1.0 sutte101 Ezra Sutton 5.0 carlj102 Jim Carleton 3.0 bassj101 John Bass 6.0 mackd101 Denny Mack 3.0 addyb101 Bob Addy 4.0 fishc102 Cherokee Fisher 1.0 hasts101 Scott Hastings 8.0 ham-r101 Ralph Ham 5.0 ansoc101 Cap Anson 2.0 sagep101 Pony Sager 6.0 birdg101 George Bird 7.0 stirg101 Gat Stires 9.0 NaN Y
3 18710508 0 Mon CL1 NaN 3 CH1 NaN 1 12 14 54.0 D NaN NaN NaN CHI01 5000.0 150.0 101403111 077000000 46.0 15.0 2.0 1.0 2.0 10.0 0.0 0.0 0.0 0.0 NaN 1.0 0.0 NaN -1.0 NaN 7.0 1.0 6.0 6.0 0.0 0.0 27.0 15.0 11.0 6.0 0.0 0.0 43.0 11.0 2.0 0.0 0.0 8.0 0.0 0.0 0.0 4.0 NaN 2.0 1.0 NaN -1.0 NaN 6.0 1.0 4.0 4.0 0.0 0.0 27.0 14.0 7.0 2.0 0.0 0.0 willg901 Gardner Willard NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN paboc101 Charlie Pabor woodj106 Jimmy Wood zettg101 George Zettlein prata101 Al Pratt NaN NaN NaN NaN prata101 Al Pratt zettg101 George Zettlein whitd102 Deacon White 2.0 kimbg101 Gene Kimball 4.0 paboc101 Charlie Pabor 7.0 allia101 Art Allison 8.0 white104 Elmer White 9.0 prata101 Al Pratt 1.0 sutte101 Ezra Sutton 5.0 carlj102 Jim Carleton 3.0 bassj101 John Bass 6.0 mcatb101 Bub McAtee 3.0 kingm101 Marshall King 8.0 hodec101 Charlie Hodes 2.0 woodj106 Jimmy Wood 4.0 simmj101 Joe Simmons 9.0 folet101 Tom Foley 7.0 duffe101 Ed Duffy 6.0 pinke101 Ed Pinkham 5.0 zettg101 George Zettlein 1.0 NaN Y
4 18710509 0 Tue BS1 NaN 2 TRO NaN 1 9 5 54.0 D NaN NaN NaN TRO01 3250.0 145.0 000002232 101003000 46.0 17.0 4.0 1.0 0.0 6.0 0.0 0.0 0.0 2.0 NaN 0.0 1.0 NaN -1.0 NaN 12.0 1.0 2.0 2.0 0.0 0.0 27.0 12.0 5.0 0.0 1.0 0.0 36.0 9.0 0.0 0.0 0.0 2.0 0.0 0.0 0.0 3.0 NaN 0.0 2.0 NaN -1.0 NaN 7.0 1.0 3.0 3.0 1.0 0.0 27.0 11.0 7.0 3.0 0.0 0.0 leroi901 Isaac Leroy NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN wrigh101 Harry Wright pikel101 Lip Pike spala101 Al Spalding mcmuj101 John McMullin NaN NaN NaN NaN spala101 Al Spalding mcmuj101 John McMullin wrigg101 George Wright 6.0 barnr102 Ross Barnes 4.0 birdd102 Dave Birdsall 9.0 mcvec101 Cal McVey 2.0 wrigh101 Harry Wright 8.0 goulc101 Charlie Gould 3.0 schah101 Harry Schafer 5.0 conef101 Fred Cone 7.0 spala101 Al Spalding 1.0 flync101 Clipper Flynn 9.0 mcgem101 Mike McGeary 2.0 yorkt101 Tom York 8.0 mcmuj101 John McMullin 1.0 kings101 Steve King 7.0 beave101 Edward Beavens 4.0 bells101 Steve Bellan 5.0 pikel101 Lip Pike 3.0 cravb101 Bill Craver 6.0 HTBF Y
In [3]:
log.tail()
Out[3]:
date number_of_game day_of_week v_name v_league v_game_number h_name h_league h_game_number v_score h_score length_outs day_night completion forefeit protest park_id attendance length_minutes v_line_score h_line_score v_at_bats v_hits v_doubles v_triples v_homeruns v_rbi v_sacrifice_hits v_sacrifice_flies v_hit_by_pitch v_walks v_intentional_walks v_strikeouts v_stolen_bases v_caught_stealing v_grounded_into_double v_first_catcher_interference v_left_on_base v_pitchers_used v_individual_earned_runs v_team_earned_runs v_wild_pitches v_balks v_putouts v_assists v_errors v_passed_balls v_double_plays v_triple_plays h_at_bats h_hits h_doubles h_triples h_homeruns h_rbi h_sacrifice_hits h_sacrifice_flies h_hit_by_pitch h_walks h_intentional_walks h_strikeouts h_stolen_bases h_caught_stealing h_grounded_into_double h_first_catcher_interference h_left_on_base h_pitchers_used h_individual_earned_runs h_team_earned_runs h_wild_pitches h_balks h_putouts h_assists h_errors h_passed_balls h_double_plays h_triple_plays hp_umpire_id hp_umpire_name 1b_umpire_id 1b_umpire_name 2b_umpire_id 2b_umpire_name 3b_umpire_id 3b_umpire_name lf_umpire_id lf_umpire_name rf_umpire_id rf_umpire_name v_manager_id v_manager_name h_manager_id h_manager_name winning_pitcher_id winning_pitcher_name losing_pitcher_id losing_pitcher_name saving_pitcher_id saving_pitcher_name winning_rbi_batter_id winning_rbi_batter_id_name v_starting_pitcher_id v_starting_pitcher_name h_starting_pitcher_id h_starting_pitcher_name v_player_1_id v_player_1_name v_player_1_def_pos v_player_2_id v_player_2_name v_player_2_def_pos v_player_3_id v_player_3_name v_player_3_def_pos v_player_4_id v_player_4_name v_player_4_def_pos v_player_5_id v_player_5_name v_player_5_def_pos v_player_6_id v_player_6_name v_player_6_def_pos v_player_7_id v_player_7_name v_player_7_def_pos v_player_8_id v_player_8_name v_player_8_def_pos v_player_9_id v_player_9_name v_player_9_def_pos h_player_1_id h_player_1_name h_player_1_def_pos h_player_2_id h_player_2_name h_player_2_def_pos h_player_3_id h_player_3_name h_player_3_def_pos h_player_4_id h_player_4_name h_player_4_def_pos h_player_5_id h_player_5_name h_player_5_def_pos h_player_6_id h_player_6_name h_player_6_def_pos h_player_7_id h_player_7_name h_player_7_def_pos h_player_8_id h_player_8_name h_player_8_def_pos h_player_9_id h_player_9_name h_player_9_def_pos additional_info acquisition_info
171902 20161002 0 Sun MIL NL 162 COL NL 162 6 4 60.0 D NaN NaN NaN DEN02 27762.0 203.0 0200000202 1100100010 39.0 10.0 4.0 1.0 2.0 6.0 0.0 0.0 1.0 4.0 0.0 12.0 2.0 1.0 0.0 0.0 8.0 7.0 4.0 4.0 1.0 0.0 30.0 12.0 1.0 0.0 0.0 0.0 41.0 13.0 4.0 0.0 1.0 4.0 1.0 0.0 1.0 3.0 0.0 11.0 0.0 1.0 0.0 0.0 12.0 5.0 6.0 6.0 0.0 0.0 30.0 13.0 0.0 0.0 0.0 0.0 barrs901 Scott Barry woodt901 Tom Woodring randt901 Tony Randazzo ortir901 Roberto Ortiz NaN NaN NaN NaN counc001 Craig Counsell weisw001 Walt Weiss thort001 Tyler Thornburg rusic001 Chris Rusin knebc001 Corey Knebel susaa001 Andrew Susac cravt001 Tyler Cravy marqg001 German Marquez villj001 Jonathan Villar 5.0 genns001 Scooter Gennett 4.0 cartc002 Chris Carter 3.0 santd002 Domingo Santana 9.0 pereh001 Hernan Perez 8.0 arcio002 Orlando Arcia 6.0 susaa001 Andrew Susac 2.0 elmoj001 Jake Elmore 7.0 cravt001 Tyler Cravy 1.0 blacc001 Charlie Blackmon 8.0 dahld001 David Dahl 7.0 arenn001 Nolan Arenado 5.0 gonzc001 Carlos Gonzalez 9.0 murpt002 Tom Murphy 2.0 pattj005 Jordan Patterson 3.0 valap001 Pat Valaika 4.0 adamc001 Cristhian Adames 6.0 marqg001 German Marquez 1.0 NaN Y
171903 20161002 0 Sun NYN NL 162 PHI NL 162 2 5 51.0 D NaN NaN NaN PHI13 36935.0 159.0 000001100 00100031x 33.0 8.0 3.0 0.0 0.0 2.0 0.0 0.0 0.0 2.0 0.0 9.0 1.0 1.0 1.0 0.0 6.0 6.0 3.0 3.0 0.0 0.0 24.0 12.0 3.0 1.0 2.0 0.0 33.0 10.0 1.0 0.0 0.0 3.0 0.0 1.0 0.0 2.0 0.0 3.0 0.0 0.0 2.0 0.0 7.0 5.0 2.0 2.0 0.0 0.0 27.0 7.0 0.0 0.0 1.0 0.0 barkl901 Lance Barksdale herna901 Angel Hernandez barrt901 Ted Barrett littw901 Will Little NaN NaN NaN NaN collt801 Terry Collins mackp101 Pete Mackanin murrc002 Colton Murray goede001 Erik Goeddel nerih001 Hector Neris hernc005 Cesar Hernandez ynoag001 Gabriel Ynoa eickj001 Jerad Eickhoff granc001 Curtis Granderson 8.0 cabra002 Asdrubal Cabrera 6.0 brucj001 Jay Bruce 9.0 dudal001 Lucas Duda 3.0 johnk003 Kelly Johnson 4.0 confm001 Michael Conforto 7.0 campe001 Eric Campbell 5.0 plawk001 Kevin Plawecki 2.0 ynoag001 Gabriel Ynoa 1.0 hernc005 Cesar Hernandez 4.0 parej002 Jimmy Paredes 7.0 herro001 Odubel Herrera 8.0 franm004 Maikel Franco 5.0 howar001 Ryan Howard 3.0 ruppc001 Cameron Rupp 2.0 blana001 Andres Blanco 6.0 altha001 Aaron Altherr 9.0 eickj001 Jerad Eickhoff 1.0 NaN Y
171904 20161002 0 Sun LAN NL 162 SFN NL 162 1 7 51.0 D NaN NaN NaN SFO03 41445.0 184.0 000100000 23000002x 30.0 4.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 2.0 0.0 7.0 0.0 0.0 1.0 0.0 4.0 7.0 7.0 7.0 0.0 0.0 24.0 5.0 1.0 0.0 0.0 0.0 39.0 16.0 3.0 1.0 0.0 7.0 0.0 0.0 0.0 4.0 1.0 11.0 2.0 1.0 0.0 0.0 12.0 2.0 1.0 1.0 0.0 0.0 27.0 7.0 0.0 0.0 1.0 0.0 knigb901 Brian Knight westj901 Joe West fleta901 Andy Fletcher danlk901 Kerwin Danley NaN NaN NaN NaN robed001 Dave Roberts bochb002 Bruce Bochy moorm003 Matt Moore maedk001 Kenta Maeda NaN NaN poseb001 Buster Posey maedk001 Kenta Maeda moorm003 Matt Moore kendh001 Howie Kendrick 7.0 turnj001 Justin Turner 5.0 seagc001 Corey Seager 6.0 puigy001 Yasiel Puig 9.0 gonza003 Adrian Gonzalez 3.0 grany001 Yasmani Grandal 2.0 pedej001 Joc Pederson 8.0 utlec001 Chase Utley 4.0 maedk001 Kenta Maeda 1.0 spand001 Denard Span 8.0 beltb001 Brandon Belt 3.0 poseb001 Buster Posey 2.0 pench001 Hunter Pence 9.0 crawb001 Brandon Crawford 6.0 pagaa001 Angel Pagan 7.0 panij002 Joe Panik 4.0 gillc001 Conor Gillaspie 5.0 moorm003 Matt Moore 1.0 NaN Y
171905 20161002 0 Sun PIT NL 162 SLN NL 162 4 10 51.0 D NaN NaN NaN STL10 44615.0 192.0 000020200 00100360x 35.0 9.0 0.0 0.0 1.0 4.0 0.0 0.0 0.0 4.0 0.0 11.0 0.0 1.0 0.0 0.0 8.0 6.0 8.0 8.0 0.0 0.0 24.0 2.0 2.0 0.0 0.0 0.0 36.0 12.0 2.0 0.0 1.0 10.0 0.0 2.0 0.0 4.0 0.0 5.0 0.0 0.0 0.0 0.0 8.0 3.0 4.0 4.0 0.0 0.0 27.0 7.0 0.0 0.0 1.0 0.0 cuzzp901 Phil Cuzzi ticht901 Todd Tichenor vanol901 Larry Vanover marqa901 Alfonso Marquez NaN NaN NaN NaN hurdc001 Clint Hurdle mathm001 Mike Matheny broxj001 Jonathan Broxton nicaj001 Juan Nicasio NaN NaN piscs001 Stephen Piscotty voger001 Ryan Vogelsong waina001 Adam Wainwright jasoj001 John Jaso 3.0 polag001 Gregory Polanco 9.0 mccua001 Andrew McCutchen 8.0 kangj001 Jung Ho Kang 5.0 joycm001 Matt Joyce 7.0 hansa001 Alen Hanson 4.0 fryee001 Eric Fryer 2.0 florp001 Pedro Florimon 6.0 voger001 Ryan Vogelsong 1.0 carpm002 Matt Carpenter 3.0 diaza003 Aledmys Diaz 6.0 moliy001 Yadier Molina 2.0 piscs001 Stephen Piscotty 9.0 peraj001 Jhonny Peralta 5.0 mossb001 Brandon Moss 7.0 gyorj001 Jedd Gyorko 4.0 gricr001 Randal Grichuk 8.0 waina001 Adam Wainwright 1.0 NaN Y
171906 20161002 0 Sun MIA NL 161 WAS NL 162 7 10 51.0 D NaN NaN NaN WAS11 28730.0 216.0 000230020 03023002x 38.0 14.0 1.0 1.0 2.0 7.0 1.0 0.0 0.0 3.0 2.0 10.0 1.0 1.0 1.0 0.0 8.0 7.0 10.0 10.0 1.0 0.0 24.0 11.0 0.0 0.0 1.0 0.0 30.0 10.0 2.0 0.0 1.0 10.0 1.0 1.0 1.0 8.0 0.0 3.0 2.0 0.0 1.0 0.0 7.0 6.0 7.0 7.0 1.0 0.0 27.0 11.0 0.0 0.0 1.0 0.0 tumpj901 John Tumpane porta901 Alan Porter onorb901 Brian O'Nora kellj901 Jeff Kellogg NaN NaN NaN NaN mattd001 Don Mattingly baked002 Dusty Baker schem001 Max Scherzer brica001 Austin Brice melam001 Mark Melancon difow001 Wilmer Difo koeht001 Tom Koehler schem001 Max Scherzer gordd002 Dee Gordon 4.0 telit001 Tomas Telis 2.0 pradm001 Martin Prado 5.0 yelic001 Christian Yelich 8.0 bourj002 Justin Bour 3.0 scrux001 Xavier Scruggs 7.0 hoodd001 Destin Hood 9.0 hecha001 Adeiny Hechavarria 6.0 koeht001 Tom Koehler 1.0 turnt001 Trea Turner 8.0 reveb001 Ben Revere 7.0 harpb003 Bryce Harper 9.0 zimmr001 Ryan Zimmerman 3.0 drews001 Stephen Drew 5.0 difow001 Wilmer Difo 4.0 espid001 Danny Espinosa 6.0 lobaj001 Jose Lobaton 2.0 schem001 Max Scherzer 1.0 NaN Y

It looks like the game log has a record of over 170,000 games. It looks like these games are chronologically ordered and occur between 1871 and 2016.

For each game we have:

  • general information on the game
  • team level stats for each team
  • a list of players from each team, numbered, with their defensive positions
  • the umpires that officiated the game
  • some 'awards', like winning and losing pitcher

We have a game_log_fields.txt file that tell us that the player number corresponds with the order in which they batted.

It's worth noting that there is no natural primary key column for this table.

In [4]:
person = pd.read_csv('person_codes.csv')
print(person.shape)
person.head()
(20494, 7)
Out[4]:
id last first player_debut mgr_debut coach_debut ump_debut
0 aardd001 Aardsma David 04/06/2004 NaN NaN NaN
1 aaroh101 Aaron Hank 04/13/1954 NaN NaN NaN
2 aarot101 Aaron Tommie 04/10/1962 NaN 04/06/1979 NaN
3 aased001 Aase Don 07/26/1977 NaN NaN NaN
4 abada001 Abad Andy 09/10/2001 NaN NaN NaN

This seems to be a list of people with IDs. The IDs look like they match up with those used in the game log. There are debut dates, for players, managers, coaches and umpires. We can see that some people might have been one or more of these roles.

It also looks like coaches and managers are two different things in baseball. After some research, managers are what would be called a 'coach' or 'head coach' in other sports, and coaches are more specialized, like base coaches. It also seems like coaches aren't recorded in the game log.

In [5]:
park = pd.read_csv('park_codes.csv')
print(park.shape)
park.head()
(252, 9)
Out[5]:
park_id name aka city state start end league notes
0 ALB01 Riverside Park NaN Albany NY 09/11/1880 05/30/1882 NL TRN:9/11/80;6/15&9/10/1881;5/16-5/18&5/30/1882
1 ALT01 Columbia Park NaN Altoona PA 04/30/1884 05/31/1884 UA NaN
2 ANA01 Angel Stadium of Anaheim Edison Field; Anaheim Stadium Anaheim CA 04/19/1966 NaN AL NaN
3 ARL01 Arlington Stadium NaN Arlington TX 04/21/1972 10/03/1993 AL NaN
4 ARL02 Rangers Ballpark in Arlington The Ballpark in Arlington; Ameriquest Fl Arlington TX 04/11/1994 NaN AL NaN

This seems to be a list of all baseball parks. There are IDs which seem to match with the game log, as well as names, nicknames, city and league.

In [6]:
team = pd.read_csv('team_codes.csv')
print(team.shape)
team.head()
(150, 8)
Out[6]:
team_id league start end city nickname franch_id seq
0 ALT UA 1884 1884 Altoona Mountain Cities ALT 1
1 ARI NL 1998 0 Arizona Diamondbacks ARI 1
2 BFN NL 1879 1885 Buffalo Bisons BFN 1
3 BFP PL 1890 1890 Buffalo Bisons BFP 1
4 BL1 NaN 1872 1874 Baltimore Canaries BL1 1

This seems to be a list of all teams, with team_ids which seem to match the game log. Interestingly, there is a franch_id, let's take a look at this:

In [7]:
team["franch_id"].value_counts().head()
Out[7]:
BS1    4
TRN    3
LAA    3
SE1    3
BR3    3
Name: franch_id, dtype: int64

We might have franch_id occurring a few times for some teams, let's look at the first one in more detail.

In [8]:
team[team["franch_id"] == 'BS1']
Out[8]:
team_id league start end city nickname franch_id seq
21 BS1 NaN 1871 1875 Boston Braves BS1 1
22 BSN NL 1876 1952 Boston Braves BS1 2
23 MLN NL 1953 1965 Milwaukee Braves BS1 3
24 ATL NL 1966 0 Atlanta Braves BS1 4

It appears that teams move between leagues and cities. The team_id changes when this happens, franch_id (which is probably 'Franchise') helps us tie all of this together.

Defensive Positions

In the game log, each player has a defensive position listed, which seems to be a number between 1-10. Doing some research around this, I found this article which gives us a list of names for each numbered position:

  1. Pitcher
  2. Catcher
  3. 1st Base
  4. 2nd Base
  5. 3rd Base
  6. Shortstop
  7. Left Field
  8. Center Field
  9. Right Field

The 10th position isn't included, it may be a way of describing a designated hitter that does not field. I can find a retrosheet page that indicates that position 0 is used for this, but we don't have any position 0 in our data. I have chosen to make this an 'Unknown Position' so I'm not including data based on a hunch.

Leagues

Wikipedia tells us there are currently two leagues - the American (AL) and National (NL). Let's start by finding out what leagues are listed in the main game log:

In [9]:
log["h_league"].value_counts()
Out[9]:
NL    88867
AL    74712
AA     5039
FL     1243
PL      532
UA      428
Name: h_league, dtype: int64

It looks like most of our games fall into the two current leagues, but that there are four other leagues. Let's write a quick function to get some info on the years of these leagues:

In [10]:
def league_info(league):
    league_games = log[log["h_league"] == league]
    earliest = league_games["date"].min()
    latest = league_games["date"].max()
    print("{} went from {} to {}".format(league,earliest,latest))

for league in log["h_league"].unique():
    league_info(league)
nan went from nan to nan
NL went from 18760422 to 20161002
AA went from 18820502 to 18911006
UA went from 18840417 to 18841019
PL went from 18900419 to 18901004
AL went from 19010424 to 20161002
FL went from 19140413 to 19151003

Now we have some years which will help us do some research. After some googling we come up with:

It also looks like we have about 1000 games where the home team doesn't have a value for league.

Importing Data into SQLite

In [11]:
# These helper functions will be useful as we work
# with the SQLite database from python

DB = "mlb.db"

def run_query(q):
    with sqlite3.connect(DB) as conn:
        return pd.read_sql(q,conn)

def run_command(c):
    with sqlite3.connect(DB) as conn:
        conn.execute('PRAGMA foreign_keys = ON;')
        conn.isolation_level = None
        conn.execute(c)

def show_tables():
    q = '''
    SELECT
        name,
        type
    FROM sqlite_master
    WHERE type IN ("table","view");
    '''
    return run_query(q)
In [12]:
tables = {
    "game_log": log,
    "person_codes": person,
    "team_codes": team,
    "park_codes": park
}

with sqlite3.connect(DB) as conn:    
    for name, data in tables.items():
        conn.execute("DROP TABLE IF EXISTS {};".format(name))
        data.to_sql(name,conn,index=False)
In [13]:
show_tables()
Out[13]:
name type
0 game_log table
1 park_codes table
2 team_codes table
3 person_codes table
In [14]:
c1 = """
ALTER TABLE game_log
ADD COLUMN game_id TEXT;
"""

# try/except loop since ALTER TABLE
# doesn't support IF NOT EXISTS
try:
    run_command(c1)
except:
    pass

c2 = """
UPDATE game_log
SET game_id = date || h_name || number_of_game
/* WHERE prevents this if it has already been done */
WHERE game_id IS NULL; 
"""

run_command(c2)

q = """
SELECT
    game_id,
    date,
    h_name,
    number_of_game
FROM game_log
LIMIT 5;
"""

run_query(q)
Out[14]:
game_id date h_name number_of_game
0 18710504FW10 18710504 FW1 0
1 18710505WS30 18710505 WS3 0
2 18710506RC10 18710506 RC1 0
3 18710508CH10 18710508 CH1 0
4 18710509TRO0 18710509 TRO 0

Looking for Normalization Opportunities

The following are opportunities for normalization of our data:

  • In person_codes, all the debut dates will be able to be reproduced using game log data.
  • In team_codes, the start, end and sequence columns will be able to be reproduced using game log data.
  • In park_codes, the start and end years will be able to be reproduced using game log data. While technically the state is an attribute of the city, we might not want to have a an incomplete city/state table so we will leave this in.
  • There are lots of places in game log where we have a player ID followed by the players name. We will be able to remove this and use the name data in person_codes
  • In game_log, all offensive and defensive stats are repeated for the home team and the visiting team. We could break these out and have a table that lists each game twice, one for each team, and cut out this column repetition.
  • Similarly, in game_log, we have a listing for 9 players on each team with their positions - we can remove these and have one table that tracks player appearances and their positions.
  • We can do a similar thing with the umpires from game_log, instead of listing all four positions as columns, we can put the umpires either in their own table or make one table for players, umpires and managers.
  • We have several awards in game_log like winning pitcher and losing pitcher. We can either break these out into their own table, have a table for awards, or combine the awards in with general appearances like the players and umpires.

Planning a Normalized Schema

The following schema was planned using DbDesigner.net:

schema

Creating Tables Without Foreign Keys

In [15]:
c1 = """
CREATE TABLE IF NOT EXISTS person (
    person_id TEXT PRIMARY KEY,
    first_name TEXT,
    last_name TEXT
);
"""

c2 = """
INSERT OR IGNORE INTO person
SELECT
    id,
    first,
    last
FROM person_codes;
"""

q = """
SELECT * FROM person
LIMIT 5;
"""

run_command(c1)
run_command(c2)
run_query(q)
Out[15]:
person_id first_name last_name
0 aardd001 David Aardsma
1 aaroh101 Hank Aaron
2 aarot101 Tommie Aaron
3 aased001 Don Aase
4 abada001 Andy Abad
In [16]:
c1 = """
CREATE TABLE IF NOT EXISTS park (
    park_id TEXT PRIMARY KEY,
    name TEXT,
    nickname TEXT,
    city TEXT,
    state TEXT,
    notes TEXT
);
"""

c2 = """
INSERT OR IGNORE INTO park
SELECT
    park_id,
    name,
    aka,
    city,
    state,
    notes
FROM park_codes;
"""

q = """
SELECT * FROM park
LIMIT 5;
"""

run_command(c1)
run_command(c2)
run_query(q)
Out[16]:
park_id name nickname city state notes
0 ALB01 Riverside Park None Albany NY TRN:9/11/80;6/15&9/10/1881;5/16-5/18&5/30/1882
1 ALT01 Columbia Park None Altoona PA None
2 ANA01 Angel Stadium of Anaheim Edison Field; Anaheim Stadium Anaheim CA None
3 ARL01 Arlington Stadium None Arlington TX None
4 ARL02 Rangers Ballpark in Arlington The Ballpark in Arlington; Ameriquest Fl Arlington TX None
In [17]:
c1 = """
CREATE TABLE IF NOT EXISTS league (
    league_id TEXT PRIMARY KEY,
    name TEXT
);
"""

c2 = """
INSERT OR IGNORE INTO league
VALUES
    ("NL", "National League"),
    ("AL", "American League"),
    ("AA", "American Association"),
    ("FL", "Federal League"),
    ("PL", "Players League"),
    ("UA", "Union Association")
;
"""

q = """
SELECT * FROM league
"""

run_command(c1)
run_command(c2)
run_query(q)
Out[17]:
league_id name
0 NL National League
1 AL American League
2 AA American Association
3 FL Federal League
4 PL Players League
5 UA Union Association
In [18]:
c1 = "DROP TABLE IF EXISTS appearance_type;"

run_command(c1)

c2 = """
CREATE TABLE appearance_type (
    appearance_type_id TEXT PRIMARY KEY,
    name TEXT,
    category TEXT
);
"""
run_command(c2)

appearance_type = pd.read_csv('appearance_type.csv')

with sqlite3.connect('mlb.db') as conn:
    appearance_type.to_sql('appearance_type',
                           conn,
                           index=False,
                           if_exists='append')

q = """
SELECT * FROM appearance_type;
"""

run_query(q)
Out[18]:
appearance_type_id name category
0 O1 Batter 1 offense
1 O2 Batter 2 offense
2 O3 Batter 3 offense
3 O4 Batter 4 offense
4 O5 Batter 5 offense
5 O6 Batter 6 offense
6 O7 Batter 7 offense
7 O8 Batter 8 offense
8 O9 Batter 9 offense
9 D1 Pitcher defense
10 D2 Catcher defense
11 D3 1st Base defense
12 D4 2nd Base defense
13 D5 3rd Base defense
14 D6 Shortstop defense
15 D7 Left Field defense
16 D8 Center Field defense
17 D9 Right Field defense
18 D10 Unknown Position defense
19 UHP Home Plate umpire
20 U1B First Base umpire
21 U2B Second Base umpire
22 U3B Third Base umpire
23 ULF Left Field umpire
24 URF Right Field umpire
25 MM Manager manager
26 AWP Winning Pitcher award
27 ALP Losing Pitcher award
28 ASP Saving Pitcher award
29 AWB Winning RBI Batter award
30 PSP Starting Pitcher pitcher

Adding The Team and Game Tables

In [19]:
c1 = """
CREATE TABLE IF NOT EXISTS team (
    team_id TEXT PRIMARY KEY,
    league_id TEXT,
    city TEXT,
    nickname TEXT,
    franch_id TEXT,
    FOREIGN KEY (league_id) REFERENCES league(league_id)
);
"""

c2 = """
INSERT OR IGNORE INTO team
SELECT
    team_id,
    league,
    city,
    nickname,
    franch_id
FROM team_codes;
"""

q = """
SELECT * FROM team
LIMIT 5;
"""

run_command(c1)
run_command(c2)
run_query(q)
Out[19]:
team_id league_id city nickname franch_id
0 ALT UA Altoona Mountain Cities ALT
1 ARI NL Arizona Diamondbacks ARI
2 BFN NL Buffalo Bisons BFN
3 BFP PL Buffalo Bisons BFP
4 BL1 None Baltimore Canaries BL1
In [20]:
c1 = """
CREATE TABLE IF NOT EXISTS game (
    game_id TEXT PRIMARY KEY,
    date TEXT,
    number_of_game INTEGER,
    park_id TEXT,
    length_outs INTEGER,
    day BOOLEAN,
    completion TEXT,
    forefeit TEXT,
    protest TEXT,
    attendance INTEGER,
    legnth_minutes INTEGER,
    additional_info TEXT,
    acquisition_info TEXT,
    FOREIGN KEY (park_id) REFERENCES park(park_id)
);
"""

c2 = """
INSERT OR IGNORE INTO game
SELECT
    game_id,
    date,
    number_of_game,
    park_id,
    length_outs,
    CASE
        WHEN day_night = "D" THEN 1
        WHEN day_night = "N" THEN 0
        ELSE NULL
        END
        AS day,
    completion,
    forefeit,
    protest,
    attendance,
    length_minutes,
    additional_info,
    acquisition_info
FROM game_log;
"""

q = """
SELECT * FROM game
LIMIT 5;
"""

run_command(c1)
run_command(c2)
run_query(q)
Out[20]:
game_id date number_of_game park_id length_outs day completion forefeit protest attendance legnth_minutes additional_info acquisition_info
0 18710504FW10 18710504 0 FOR01 54 1 None None None 200 120 None Y
1 18710505WS30 18710505 0 WAS01 54 1 None None None 5000 145 HTBF Y
2 18710506RC10 18710506 0 RCK01 54 1 None None None 1000 140 None Y
3 18710508CH10 18710508 0 CHI01 54 1 None None None 5000 150 None Y
4 18710509TRO0 18710509 0 TRO01 54 1 None None None 3250 145 HTBF Y

Adding the Team Appearance Table

In [21]:
c1 = """
CREATE TABLE IF NOT EXISTS team_appearance (
    team_id TEXT,
    game_id TEXT,
    home BOOLEAN,
    league_id TEXT,
    score INTEGER,
    line_score TEXT,
    at_bats INTEGER,
    hits INTEGER,
    doubles INTEGER,
    triples INTEGER,
    homeruns INTEGER,
    rbi INTEGER,
    sacrifice_hits INTEGER,
    sacrifice_flies INTEGER,
    hit_by_pitch INTEGER,
    walks INTEGER,
    intentional_walks INTEGER,
    strikeouts INTEGER,
    stolen_bases INTEGER,
    caught_stealing INTEGER,
    grounded_into_double INTEGER,
    first_catcher_interference INTEGER,
    left_on_base INTEGER,
    pitchers_used INTEGER,
    individual_earned_runs INTEGER,
    team_earned_runs INTEGER,
    wild_pitches INTEGER,
    balks INTEGER,
    putouts INTEGER,
    assists INTEGER,
    errors INTEGER,
    passed_balls INTEGER,
    double_plays INTEGER,
    triple_plays INTEGER,
    PRIMARY KEY (team_id, game_id),
    FOREIGN KEY (team_id) REFERENCES team(team_id),
    FOREIGN KEY (game_id) REFERENCES game(game_id),
    FOREIGN KEY (team_id) REFERENCES team(team_id)
);
"""

run_command(c1)

c2 = """
INSERT OR IGNORE INTO team_appearance
    SELECT
        h_name,
        game_id,
        1 AS home,
        h_league,
        h_score,
        h_line_score,
        h_at_bats,
        h_hits,
        h_doubles,
        h_triples,
        h_homeruns,
        h_rbi,
        h_sacrifice_hits,
        h_sacrifice_flies,
        h_hit_by_pitch,
        h_walks,
        h_intentional_walks,
        h_strikeouts,
        h_stolen_bases,
        h_caught_stealing,
        h_grounded_into_double,
        h_first_catcher_interference,
        h_left_on_base,
        h_pitchers_used,
        h_individual_earned_runs,
        h_team_earned_runs,
        h_wild_pitches,
        h_balks,
        h_putouts,
        h_assists,
        h_errors,
        h_passed_balls,
        h_double_plays,
        h_triple_plays
    FROM game_log

UNION

    SELECT    
        v_name,
        game_id,
        0 AS home,
        v_league,
        v_score,
        v_line_score,
        v_at_bats,
        v_hits,
        v_doubles,
        v_triples,
        v_homeruns,
        v_rbi,
        v_sacrifice_hits,
        v_sacrifice_flies,
        v_hit_by_pitch,
        v_walks,
        v_intentional_walks,
        v_strikeouts,
        v_stolen_bases,
        v_caught_stealing,
        v_grounded_into_double,
        v_first_catcher_interference,
        v_left_on_base,
        v_pitchers_used,
        v_individual_earned_runs,
        v_team_earned_runs,
        v_wild_pitches,
        v_balks,
        v_putouts,
        v_assists,
        v_errors,
        v_passed_balls,
        v_double_plays,
        v_triple_plays
    from game_log;
"""

run_command(c2)

q = """
SELECT * FROM team_appearance
WHERE game_id = (
                 SELECT MIN(game_id) from game
                )
   OR game_id = (
                 SELECT MAX(game_id) from game
                )
ORDER By game_id, home;
"""

run_query(q)
Out[21]:
team_id game_id home league_id score line_score at_bats hits doubles triples homeruns rbi sacrifice_hits sacrifice_flies hit_by_pitch walks intentional_walks strikeouts stolen_bases caught_stealing grounded_into_double first_catcher_interference left_on_base pitchers_used individual_earned_runs team_earned_runs wild_pitches balks putouts assists errors passed_balls double_plays triple_plays
0 CL1 18710504FW10 0 None 0 000000000 30 4 1 0 0 0 0 0 0 1 NaN 6 1 NaN -1 NaN 4 1 1 1 0 0 27 9 0 3 0 0
1 FW1 18710504FW10 1 None 2 010010000 31 4 1 0 0 2 0 0 0 1 NaN 0 0 NaN -1 NaN 3 1 0 0 0 0 27 3 3 1 1 0
2 MIA 20161002WAS0 0 NL 7 000230020 38 14 1 1 2 7 1 0 0 3 2.0 10 1 1.0 1 0.0 8 7 10 10 1 0 24 11 0 0 1 0
3 WAS 20161002WAS0 1 NL 10 03023002x 30 10 2 0 1 10 1 1 1 8 0.0 3 2 0.0 1 0.0 7 6 7 7 1 0 27 11 0 0 1 0

Adding the Person Appearance Table

In [22]:
c0 = "DROP TABLE IF EXISTS person_appearance"

run_command(c0)

c1 = """
CREATE TABLE person_appearance (
    appearance_id INTEGER PRIMARY KEY,
    person_id TEXT,
    team_id TEXT,
    game_id TEXT,
    appearance_type_id,
    FOREIGN KEY (person_id) REFERENCES person(person_id),
    FOREIGN KEY (team_id) REFERENCES team(team_id),
    FOREIGN KEY (game_id) REFERENCES game(game_id),
    FOREIGN KEY (appearance_type_id) REFERENCES appearance_type(appearance_type_id)
);
"""

c2 = """
INSERT OR IGNORE INTO person_appearance (
    game_id,
    team_id,
    person_id,
    appearance_type_id
) 
    SELECT
        game_id,
        NULL,
        hp_umpire_id,
        "UHP"
    FROM game_log
    WHERE hp_umpire_id IS NOT NULL    

UNION

    SELECT
        game_id,
        NULL,
        [1b_umpire_id],
        "U1B"
    FROM game_log
    WHERE "1b_umpire_id" IS NOT NULL

UNION

    SELECT
        game_id,
        NULL,
        [2b_umpire_id],
        "U2B"
    FROM game_log
    WHERE [2b_umpire_id] IS NOT NULL

UNION

    SELECT
        game_id,
        NULL,
        [3b_umpire_id],
        "U3B"
    FROM game_log
    WHERE [3b_umpire_id] IS NOT NULL

UNION

    SELECT
        game_id,
        NULL,
        lf_umpire_id,
        "ULF"
    FROM game_log
    WHERE lf_umpire_id IS NOT NULL

UNION

    SELECT
        game_id,
        NULL,
        rf_umpire_id,
        "URF"
    FROM game_log
    WHERE rf_umpire_id IS NOT NULL

UNION

    SELECT
        game_id,
        v_name,
        v_manager_id,
        "MM"
    FROM game_log
    WHERE v_manager_id IS NOT NULL

UNION

    SELECT
        game_id,
        h_name,
        h_manager_id,
        "MM"
    FROM game_log
    WHERE h_manager_id IS NOT NULL

UNION

    SELECT
        game_id,
        CASE
            WHEN h_score > v_score THEN h_name
            ELSE v_name
            END,
        winning_pitcher_id,
        "AWP"
    FROM game_log
    WHERE winning_pitcher_id IS NOT NULL

UNION

    SELECT
        game_id,
        CASE
            WHEN h_score < v_score THEN h_name
            ELSE v_name
            END,
        losing_pitcher_id,
        "ALP"
    FROM game_log
    WHERE losing_pitcher_id IS NOT NULL

UNION

    SELECT
        game_id,
        CASE
            WHEN h_score > v_score THEN h_name
            ELSE v_name
            END,
        saving_pitcher_id,
        "ASP"
    FROM game_log
    WHERE saving_pitcher_id IS NOT NULL

UNION

    SELECT
        game_id,
        CASE
            WHEN h_score > v_score THEN h_name
            ELSE v_name
            END,
        winning_rbi_batter_id,
        "AWB"
    FROM game_log
    WHERE winning_rbi_batter_id IS NOT NULL

UNION

    SELECT
        game_id,
        v_name,
        v_starting_pitcher_id,
        "PSP"
    FROM game_log
    WHERE v_starting_pitcher_id IS NOT NULL

UNION

    SELECT
        game_id,
        h_name,
        h_starting_pitcher_id,
        "PSP"
    FROM game_log
    WHERE h_starting_pitcher_id IS NOT NULL;
"""

template = """
INSERT INTO person_appearance (
    game_id,
    team_id,
    person_id,
    appearance_type_id
) 
    SELECT
        game_id,
        {hv}_name,
        {hv}_player_{num}_id,
        "O{num}"
    FROM game_log
    WHERE {hv}_player_{num}_id IS NOT NULL

UNION

    SELECT
        game_id,
        {hv}_name,
        {hv}_player_{num}_id,
        "D" || CAST({hv}_player_{num}_def_pos AS INT)
    FROM game_log
    WHERE {hv}_player_{num}_id IS NOT NULL;
"""

run_command(c1)
run_command(c2)

for hv in ["h","v"]:
    for num in range(1,10):
        query_vars = {
            "hv": hv,
            "num": num
        }
        run_command(template.format(**query_vars))
In [23]:
print(run_query("SELECT COUNT(DISTINCT game_id) games_game FROM game"))
print(run_query("SELECT COUNT(DISTINCT game_id) games_person_appearance FROM person_appearance"))

q = """
SELECT
    pa.*,
    at.name,
    at.category
FROM person_appearance pa
INNER JOIN appearance_type at on at.appearance_type_id = pa.appearance_type_id
WHERE PA.game_id = (
                   SELECT max(game_id)
                    FROM person_appearance
                   )
ORDER BY team_id, appearance_type_id
"""

run_query(q)
   games_game
0      171907
   games_person_appearance
0                   171907
Out[23]:
appearance_id person_id team_id game_id appearance_type_id name category
0 1646109 porta901 None 20161002WAS0 U1B First Base umpire
1 1646108 onorb901 None 20161002WAS0 U2B Second Base umpire
2 1646107 kellj901 None 20161002WAS0 U3B Third Base umpire
3 1646110 tumpj901 None 20161002WAS0 UHP Home Plate umpire
4 1646111 brica001 MIA 20161002WAS0 ALP Losing Pitcher award
5 6716279 koeht001 MIA 20161002WAS0 D1 Pitcher defense
6 4744553 telit001 MIA 20161002WAS0 D2 Catcher defense
7 5589581 bourj002 MIA 20161002WAS0 D3 1st Base defense
8 4462877 gordd002 MIA 20161002WAS0 D4 2nd Base defense
9 5026229 pradm001 MIA 20161002WAS0 D5 3rd Base defense
10 6434609 hecha001 MIA 20161002WAS0 D6 Shortstop defense
11 5871257 scrux001 MIA 20161002WAS0 D7 Left Field defense
12 5307905 yelic001 MIA 20161002WAS0 D8 Center Field defense
13 6152933 hoodd001 MIA 20161002WAS0 D9 Right Field defense
14 1646113 mattd001 MIA 20161002WAS0 MM Manager manager
15 4462878 gordd002 MIA 20161002WAS0 O1 Batter 1 offense
16 4744554 telit001 MIA 20161002WAS0 O2 Batter 2 offense
17 5026230 pradm001 MIA 20161002WAS0 O3 Batter 3 offense
18 5307906 yelic001 MIA 20161002WAS0 O4 Batter 4 offense
19 5589582 bourj002 MIA 20161002WAS0 O5 Batter 5 offense
20 5871258 scrux001 MIA 20161002WAS0 O6 Batter 6 offense
21 6152934 hoodd001 MIA 20161002WAS0 O7 Batter 7 offense
22 6434610 hecha001 MIA 20161002WAS0 O8 Batter 8 offense
23 6716280 koeht001 MIA 20161002WAS0 O9 Batter 9 offense
24 1646112 koeht001 MIA 20161002WAS0 PSP Starting Pitcher pitcher
25 1646116 melam001 WAS 20161002WAS0 ASP Saving Pitcher award
26 1646115 difow001 WAS 20161002WAS0 AWB Winning RBI Batter award
27 1646117 schem001 WAS 20161002WAS0 AWP Winning Pitcher award
28 4181201 schem001 WAS 20161002WAS0 D1 Pitcher defense
29 3899525 lobaj001 WAS 20161002WAS0 D2 Catcher defense
30 2772821 zimmr001 WAS 20161002WAS0 D3 1st Base defense
31 3336173 difow001 WAS 20161002WAS0 D4 2nd Base defense
32 3054497 drews001 WAS 20161002WAS0 D5 3rd Base defense
33 3617849 espid001 WAS 20161002WAS0 D6 Shortstop defense
34 2209469 reveb001 WAS 20161002WAS0 D7 Left Field defense
35 1927793 turnt001 WAS 20161002WAS0 D8 Center Field defense
36 2491145 harpb003 WAS 20161002WAS0 D9 Right Field defense
37 1646114 baked002 WAS 20161002WAS0 MM Manager manager
38 1927794 turnt001 WAS 20161002WAS0 O1 Batter 1 offense
39 2209470 reveb001 WAS 20161002WAS0 O2 Batter 2 offense
40 2491146 harpb003 WAS 20161002WAS0 O3 Batter 3 offense
41 2772822 zimmr001 WAS 20161002WAS0 O4 Batter 4 offense
42 3054498 drews001 WAS 20161002WAS0 O5 Batter 5 offense
43 3336174 difow001 WAS 20161002WAS0 O6 Batter 6 offense
44 3617850 espid001 WAS 20161002WAS0 O7 Batter 7 offense
45 3899526 lobaj001 WAS 20161002WAS0 O8 Batter 8 offense
46 4181202 schem001 WAS 20161002WAS0 O9 Batter 9 offense
47 1646118 schem001 WAS 20161002WAS0 PSP Starting Pitcher pitcher

Removing the Original Tables

In [24]:
show_tables()
Out[24]:
name type
0 game_log table
1 park_codes table
2 team_codes table
3 person_codes table
4 person table
5 park table
6 league table
7 appearance_type table
8 team table
9 game table
10 team_appearance table
11 person_appearance table
In [25]:
tables = [
    "game_log",
    "park_codes",
    "team_codes",
    "person_codes"
]

for t in tables:
    c = '''
    DROP TABLE {}
    '''.format(t)
    
    run_command(c)

show_tables()
Out[25]:
name type
0 person table
1 park table
2 league table
3 appearance_type table
4 team table
5 game table
6 team_appearance table
7 person_appearance table